# Credits Analysis: Data Loading Functions
# Alex F. Gazmararian
# agazmararian@gmail.com

#' Load most recent annotations from cache/api
#' @param filename CSV filename to look for (default: "annotated_statements.csv")
#' @return Data frame of annotations
load_most_recent_annotations <- function(filename = "annotated_statements.csv") {
  # First try the new statement_annotations subfolder with CSV format
  cache_base <- here::here("data", "cache", "annotations")
  if (dir.exists(cache_base)) {
    # Get all date folders
    date_folders <- list.dirs(cache_base, full.names = FALSE, recursive = FALSE)
    date_folders <- date_folders[grepl("^\\d{8}$", date_folders)]
    
    if (length(date_folders) > 0) {
      # Sort and get most recent
      most_recent <- max(date_folders)
      file_path <- file.path(cache_base, most_recent, filename)
      
      if (file.exists(file_path)) {
        message(sprintf("Loading annotations from %s", file_path))
        return(readr::read_csv(file_path))
      }
    }
  }
  
  # Fallback: try the old cache structure for backward compatibility (RDS format)
  cache_base_old <- here::here("data", "cache", "api")
  if (dir.exists(cache_base_old)) {
    date_folders <- list.dirs(cache_base_old, full.names = FALSE, recursive = FALSE)
    # Exclude the statement_annotations folder and only look for date folders
    date_folders <- date_folders[grepl("^\\d{8}$", date_folders)]
    
    if (length(date_folders) > 0) {
      most_recent <- max(date_folders)
      # Try old RDS filename for backward compatibility
      file_path_rds <- file.path(cache_base_old, most_recent, "annotated_post.rds")
      
      if (file.exists(file_path_rds)) {
        message(sprintf("Loading annotations from old cache structure: %s", file_path_rds))
        return(readRDS(file_path_rds))
      }
    }
  }
  
  # If nothing found, return NULL with warning
  warning("No annotation files found in cache/api")
  return(NULL)
}
